from lxml import etree

html = etree.parse('./test.html', etree.HTMLParser())


result = etree.tostring(html)
print(result.decode('utf-8'))

print('======================================\n')


#  所有节点
# res=html.xpath('//*')
# print(res)


# 子节点以及孙子节点下的所有li
# res=html.xpath('//li')
# print(res)


# 子节点以及孙子节点下的li下的a标签
# res=html.xpath('//li/a')
# print(res)



# res=html.xpath('//li//a')
# print(res)


# 父节点
# result = html.xpath('//a[@href="link4.html"]/../@class')  
# result = html.xpath('//a[@href="link4.html"]/parent::*/@class')
# print(result)


# 根据class获取指定li
# res=html.xpath('//li[@class="item-0"]')
# print(res)



# res=html.xpath('//li[@class="item-0"]/text()')
# res=html.xpath('//li[@class="item-0"]/a/text()')
res=html.xpath('//li[@class="item-0"]//text()')
print(res)